###
### Code to replicate tables and figures in the appendix
###




###
### Appendix B
###
load("NationalLegislatorsData.RData")

# table B.1
t.test(df$n.proposal[df$reserve==0], df$n.proposal[df$reserve==1])
t.test(df$n.cosponsor[df$reserve==0], df$n.cosponsor[df$reserve==1])
t.test(log(df$between[df$reserve==0]+1), log(df$between[df$reserve==1]+1))
t.test(log(df$ego_frac[df$reserve==0]+1), log(df$ego_frac[df$reserve==1]+1))




###
### Appendix C
###
library(rstan) # in this analysis, I use rstan: version 2.18.2
rstan_options(auto_write=TRUE)
options(mc.cores=3)

# multi-level negative binomial model with random slopes
nb.model2 <- "
data{
  int N;
  int K;

  int y[N];
  
  int x[N];
  
  matrix[N,K] Z;

  int<lower=0> P;
  int<lower=0, upper=P> party[N];

  int<lower=0> S;
  int<lower=0, upper=S> province[N];
}

parameters{
  real alpha;
  real beta_mu;
  vector[P] beta_p;
  
  vector[K] delta;

  vector[P] nu_p;
  vector[S] nu_s;
  
  real<lower=0> phi;
  
  real<lower=0, upper=100> sigma_b;
  real<lower=0, upper=100> sigma_p;
  real<lower=0, upper=100> sigma_s;
}

transformed parameters{
  vector[N] mu;
  
  for(i in 1:N){
    mu[i] <- alpha + beta_p[party[i]]*x[i] + Z[i,]*delta + nu_p[party[i]] + nu_s[province[i]];
  }
}

model{
  y ~ neg_binomial_2_log(mu, phi);

  beta_p ~ normal(beta_mu, sigma_b);
  nu_p ~ normal(0, sigma_p);
  nu_s ~ normal(0, sigma_s);
}
"

sm.nb2 <- stan_model(model_code=nb.model2)

load("NationalLegislatorsData.RData")

# party and province IDs
df$partyID <- as.numeric(as.factor(df$party))
df$provinceID <- as.numeric(as.factor(df$province))

# three types of legislators 
# type2 = SMDP female
# type3 = RS female
# type4 = RS religious minority
df$type2 <- 0
df$type2[df$type==2] <- 1
df$type3 <- 0
df$type3[df$type==3] <- 1
df$type4 <- 0
df$type4[df$type==4] <- 1

round(cor(df[,c("type2", "type3", "type4")]), 2)

party_reserve <- NULL
for(i in unique(df$party)){
  wr <- sum(df$type3[df$party==i])
  rr <- sum(df$type4[df$party==i])
  
  if(sum(wr, rr) > 0){
    party_reserve <- rbind(party_reserve, data.frame(i, wr, rr))
  }
}

#library(stargazer)
#stargazer(t(t(party_reserve)))

prop.dat1 <- list(N=nrow(df),
                  y=df$n.proposal,
                  x=df$reserve,
                  Z=cbind(df$term, df$committee_chair),
                  K=2,
                  P=length(unique(df$partyID)),
                  party=df$partyID,
                  S=length(unique(df$provinceID)),
                  province=df$provinceID)

# random slope model for the number of proposals
prop.fit.slp <- sampling(sm.nb2, data=prop.dat1, iter=15000, warmup=10000, chains=3,
                         seed=1234)

cos.dat1 <- list(N=nrow(df),
                 y=df$n.cosponsor, 
                 x=df$reserve,
                 Z=cbind(df$term, df$committee_chair),
                 K=2,
                 P=length(unique(df$partyID)),
                 party=df$partyID,
                 S=length(unique(df$provinceID)),
                 province=df$provinceID)

# random slope model for the number of cosponsors
cos.fit.slp <- sampling(sm.nb2, data=cos.dat1, iter=15000, warmup=10000, chains=3,
                        seed=1234)

#save(prop.fit.slp, cos.fit.slp, file="RandomSlopeStan.RData")

load("RandomSlopeStan.RData")

# unique parties
parties <- unique(df[,c("party", "partyID")])
parties <- parties[order(parties$partyID),]

# models 1 and 3 of table 1
load("PakistanStan.RData")
prop.bm <- summary(prop.fit1)$summary[2,c(1,4,8)]
cos.bm <- summary(cos.fit1)$summary[2,c(1,4,8)]

prop.tab <- summary(prop.fit.slp)$summary[,c(1,4,8)]
prop.tab <- as.data.frame(prop.tab[grep("beta_p", row.names(prop.tab)),])
prop.tab$party <- parties$party
prop.tab <- prop.tab[order(prop.tab$mean),]

prop.tab$rin <- NA
for(i in 1:nrow(prop.tab)){
  rn <- which(df$party==prop.tab$party[i])
  
  if(any(df$reserve[rn]==1)){
    prop.tab$rin[i] <- 1
  }
  else{
    prop.tab$rin[i] <- 0
  }
}

prop.tab <- prop.tab[prop.tab$rin==1,]

cos.tab <- summary(cos.fit.slp)$summary[,c(1,4,8)]
cos.tab <- as.data.frame(cos.tab[grep("beta_p", row.names(cos.tab)),])
cos.tab$party <- parties$party
cos.tab <- cos.tab[order(cos.tab$mean),]

cos.tab$rin <- NA
for(i in 1:nrow(cos.tab)){
  rn <- which(df$party==cos.tab$party[i])
  
  if(any(df$reserve[rn]==1)){
    cos.tab$rin[i] <- 1
  }
  else{
    cos.tab$rin[i] <- 0
  }
}

cos.tab <- cos.tab[cos.tab$rin==1,]

# figure C.1
#pdf("ranslope.pdf", width=7, height=8)
par(mfrow=c(2,1))

plot(1:nrow(prop.tab), prop.tab$mean, type="n",
     ylim=c(min(prop.tab$`2.5%`)-1, max(prop.tab$`97.5%`)+1),
     ylab="Posterior Estimate of RS", xlab="", xaxt="n",
     main="Number of Proposals")
polygon(c(0:25, rev(0:25)), 
        c(rep(prop.bm[2], times=26), rep(prop.bm[3], times=26)),
        border=NA, col=rgb(0.1,0.1,0.1,0.1))
abline(h=prop.bm[1], col="gray70", lwd=1.5)
points(1:nrow(prop.tab), prop.tab$mean, pch=20)
segments(1:nrow(prop.tab), prop.tab$`2.5%`, 1:nrow(prop.tab), prop.tab$`97.5%`)
#abline(h=0, lty=2)
axis(1, at=1:nrow(prop.tab), labels=prop.tab$party, las=2)

plot(1:nrow(cos.tab), cos.tab$mean, type="n",
     ylim=c(min(cos.tab$`2.5%`)-1, max(cos.tab$`97.5%`)+1),
     ylab="Posterior Estimate of RS", xlab="", xaxt="n",
     main="Number of Cosponsors")
polygon(c(0:25, rev(0:25)), 
        c(rep(cos.bm[2], times=26), rep(cos.bm[3], times=26)),
        border=NA, col=rgb(0.1,0.1,0.1,0.1))
abline(h=cos.bm[1], col="gray70", lwd=1.5)
points(1:nrow(cos.tab), cos.tab$mean, pch=20)
segments(1:nrow(cos.tab), cos.tab$`2.5%`, 1:nrow(cos.tab), cos.tab$`97.5%`)
#abline(h=0, lty=2)
axis(1, at=1:nrow(cos.tab), labels=cos.tab$party, las=2)

#dev.off()




###
### Appendix D
###
library(rstan)
rstan_options(auto_write=TRUE)
options(mc.cores=3)

logit.model <- "
data{
  int N;

  int y[N];

  vector[N] x;

  int<lower=0> P;
  int<lower=0, upper=P> party[N];
}

parameters{
  real mu_alpha;
  vector[P] alpha;
  real beta;

  real<lower=0, upper=100> sigma_a;
}

model{
  y ~ bernoulli_logit(alpha[party] + x*beta);

  alpha ~ normal(mu_alpha, sigma_a);
}
"

sm.lg <- stan_model(model_code=logit.model)

load("PunjabLegislatorsData.RData")
colnames(df)

unique(df$party)

# dummy of propose or not
df$sponsor <- ifelse(df$private==0, 0, 1)
table(df$sponsor)

# dummy of collaborate or not
df$collaborate <- ifelse(df$cosponsor==0, 0, 1)
table(df$collaborate)

# party ID
df$partyID <- as.numeric(as.factor(df$party))

# session 19 subset
df19 <- df[df$session==19,]
df19$partyID <- as.numeric(as.factor(df19$party))

# session 20 subset
df20 <- df[df$session==20,]
df20$partyID <- as.numeric(as.factor(df20$party))

sp19.dat <- list(N=nrow(df19),
                 y=df19$sponsor, 
                 x=df19$reserved,
                 P=length(unique(df19$partyID)),
                 party=df19$partyID)

# table D.1, model 1
sp19.fit <- sampling(sm.lg, data=sp19.dat, iter=15000, warmup=10000, chains=3,
                     seed=1234)

cs19.dat <- list(N=nrow(df19),
                 y=df19$collaborate, 
                 x=df19$reserved,
                 P=length(unique(df19$partyID)),
                 party=df19$partyID)

# table D.1, model 2
cs19.fit <- sampling(sm.lg, data=cs19.dat, iter=15000, warmup=10000, chains=3,
                     seed=1234)

sp20.dat <- list(N=nrow(df20),
                 y=df20$sponsor, 
                 x=df20$reserved,
                 P=length(unique(df20$partyID)),
                 party=df20$partyID)


sp20.fit <- sampling(sm.lg, data=sp20.dat, iter=15000, warmup=10000, chains=3,
                     seed=1234)

#save(sp19.fit, cs19.fit, sp20.fit, file="PunjabStan.RData")

load("PUnjabStan.RData")

# table D.1
round(summary(sp19.fit)$summary[c(8,9),c(1,3)], 3)
round(summary(cs19.fit)$summary[c(8,9),c(1,3)], 3)
round(summary(sp20.fit)$summary[c(12,13),c(1,3)], 3)




###
### Appendix E
###
library(statnet);library(latentnet)

load("NationalLegislatorsData.RData")
load("CosponsorshipNetwork.RData")

# dummy for governing party
df$gov.party <- 0
df$gov.party[df$party=="PML(N)"] <- 1
table(df$gov.party)

cos.c <- as.network(cosponsor, loops=FALSE, directed=FALSE,
                    ignore.eval=FALSE, names.eval="count")
set.vertex.attribute(cos.c, "reserved", df$reserve+1)
set.vertex.attribute(cos.c, "type", df$type)
set.vertex.attribute(cos.c, "term", df$term)
set.vertex.attribute(cos.c, "committee_chair", df$committee_chair)
set.vertex.attribute(cos.c, "gov.party", df$gov.party)
set.vertex.attribute(cos.c, "party", df$party)
set.vertex.attribute(cos.c, "province", df$province)

# fit six latent network models with different numbers of latent dimensions
lsm.f1 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=1),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f1)

lsm.f2 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=2),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f2)

lsm.f3 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=3),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f3)

lsm.f4 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=4),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f4)

lsm.f5 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=5),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f5)

lsm.f6 <- ergmm(cos.c ~ nodefactor("reserved", base=1) + nodefactor("gov.party")
                + nodecov("term") + nodefactor("committee_chair") 
                + nodematch("party") + nodematch("province")
                + nodemix("reserved", base=c(1,1))
                + euclidean(d=6),
                family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f6)

# model in table E.1
lsm.f3_2 <- ergmm(cos.c ~ nodefactor("type") + nodefactor("gov.party")
                  + nodecov("term") + nodefactor("committee_chair") 
                  + nodematch("party") + nodematch("province")
                  + nodemix("type", base=c(1,1))
                  + euclidean(d=3),
                  family="Bernoulli", response="count", tofit="mle")
#summary(lsm.f3_2)

#save(lsm.f1, lsm.f2, lsm.f3, lsm.f4, lsm.f5, lsm.f6,
#     lsm.f3_2,
#     file="LatentPredictions.RData")

load("LatentPredictions.RData")

# calculate true positive rates
getTruePositive <- function(model){
  library(Rlab)
  
  ad <- as.vector(cosponsor)
  
  pd <- as.vector(predict(model, type="mle"))
  #summary(pd)
  
  trials <- sapply(1:length(ad), function(p){rbern(n=1000, prob=pd[p])})
  
  correct <- sapply(1:ncol(trials), function(p){
    sum(trials[,p]==ad[p])/1000
  })
  
  return(mean(correct[ad==1]))
}

fs <- c(getTruePositive(lsm.f1), getTruePositive(lsm.f2),
        getTruePositive(lsm.f3), getTruePositive(lsm.f4),
        getTruePositive(lsm.f5), getTruePositive(lsm.f6))
fs

# figure E.1
#pdf("ndimension.pdf", width=7, height=5.5)

par(mar=c(5.1,4.1,2.1,2.1))
plot(1:6, 1- fs, type="o", pch=19, ylim=c(0,0.6),
     xlab="Number of Latent Dimensions", ylab="Average False Negative Rate")

#dev.off()

# table E.1
summary(lsm.f3_2)




###
### Appendix F
###
library(igraph)

load("CosponsorshipNetwork.RData")

cosponsor.sub <- cosponsor[colSums(cosponsor) != 0, colSums(cosponsor) != 0]
net <- graph.adjacency(cosponsor.sub, mode="undirected", diag=FALSE)

hc <- fastgreedy.community(net)

group <- table(df$party[colSums(cosponsor) != 0], as.vector(membership(hc)))

# table F.1
group